What is the relationship between the reasons school leaders cite for innovating and the future practices they hope to implement?

Correlation plot

For this guiding question, I look only at 2024 data. Below, you’ll see on the each pilot practice (what schools hope to implement in the next 1-5 years) on the left and catalyst for innovation, or the reason(s) school leaders cite for innovating, across the top.

# Correlations between catalyst variables and reasons for innovating
catalysts <- full %>% 
  select(starts_with("catalyst_")) %>% 
  select(-starts_with("catalyst_key"), -contains("_other")) 

pilots <- full %>% 
  select(starts_with("pilot_")) %>% 
  select(-contains("_other")) 
remove_zero_variance <- function(df) {
  # Apply function to each column
  non_constant_cols <- df[, apply(df, 2, sd) != 0]
  return(non_constant_cols)
}

catalysts_clean <- remove_zero_variance(catalysts)
pilots_clean <- remove_zero_variance(pilots) #just one removed

reasons_and_futures <- cor(pilots_clean, catalysts_clean)
rfdf <- data.frame(reasons_and_futures)
corrplot::corrplot(reasons_and_futures, method = "color", tl.col = "black", tl.cex = 0.3, c1.cex = 0.3, number.cex = 0.2, diag = TRUE)

Kind of clunky. I’m going to modify to fix the labels manually and then expand the figure to better see the correlations.

# rename catalysts
rename_catalyst_mapping <- setNames(
  dictionary$clean_labels[dictionary$variable_name %in% colnames(catalysts)], 
  dictionary$variable_name[dictionary$variable_name %in% colnames(catalysts)]
)

# rename pilots
rename_pilot_mapping <- setNames(
  dictionary$clean_labels[dictionary$variable_name %in% colnames(pilots)], 
  dictionary$variable_name[dictionary$variable_name %in% colnames(pilots)]
)

# Correlations between catalyst variables and reasons for innovating
catalysts <- full %>% 
  select(starts_with("catalyst_")) %>% 
  select(-starts_with("catalyst_key"), -contains("_other")) %>% 
  rename_with(~ rename_catalyst_mapping[.x], .cols = everything())

pilots <- full %>% 
  select(starts_with("pilot_")) %>% 
  select(-contains("_other")) %>% 
  rename_with(~ rename_pilot_mapping[.x], .cols = everything())

catalysts_clean <- remove_zero_variance(catalysts)
pilots_clean <- remove_zero_variance(pilots) #just one removed

reasons_and_futures <- cor(pilots_clean, catalysts_clean)
rfdf <- data.frame(reasons_and_futures)
corrplot::corrplot(reasons_and_futures, method = "color", tl.col = "black", tl.cex = 0.6, c1.cex = 0.3, number.cex = 0.2, diag = TRUE, cl.pos = "n")

Correlation table

rfdf %>% 
  round(2) %>% 
  datatable()

The following are related to each catalyst. Correlations above 0.15 are noted.

  • External: Assessments career (0.20), MTSS academics (0.15), assessments bilingual (0.15), multi-age (0.15)
  • Student agency: student conferences (0.18) assessments agency (0.17)
  • Teacher agency: hiring equity (0.22), enriched virtual (0.18), colead teachers (0.17)
  • Demographics: sel integrated (0.24), student conferences (0.18), advancement mastery (0.17), colead industry (0.17), antiracist (0.16), makerspace (0.15)
  • Internal: colead teachers(0.22), competency framework (0.18), extended learning (0.15), sel integrated (0.15)
  • Stakeholders: colead CBO (0.18), antiracist (0.17)
  • Inequities: UDL (0.15)
  • Absence: expanded open hours (0.18)
  • Covid: MTSS academics (0.16), interdisciplinary (0.15), multi-age (0.15)
  • No notable correlations for following catalysts: cutting edge, mental health

How have the main reasons for innovating changed since 2021?

catalyst_all_years <- import(here("data/longitudinal", "longitudinal_data.csv")) %>% 
  select(year, school_id, starts_with("catalyst")) %>% 
  filter(year == 2021 | year == 2024) 

catalyst_all_years_long <- catalyst_all_years %>% 
  select(-contains("_other"), -contains("_key")) %>% 
  pivot_longer(cols = contains("catalyst"),
               names_to = "catalyst",
               values_to = "selected",
               names_prefix = "catalyst_")

From Cycle 2, I had started to create this graph. This shows catalyst selection across schools.

library(ggrepel)

catalyst_all_years_long <- catalyst_all_years_long %>% 
  group_by(catalyst, year) %>% 
  summarize(total_selected = sum(selected))

label_positions <- catalyst_all_years_long %>% 
  group_by(catalyst) %>% 
  summarize(year = 2021, selected = first(total_selected))

catalyst_all_years_long %>% 
  ggplot(aes(x = year, y = total_selected, color = catalyst)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_label_repel(data = label_positions, aes(y = selected, label = catalyst)) +
  scale_x_continuous(breaks = c(2021, 2024)) +
  labs(x = "Year",
       y = "Number of Times Selected",
       title = "Catalyst Selection by Year \n Across Schools") +
  theme(legend.position = "none")

Version using percentages is added here.

n_2021 = 232
n_2024 = 189

catalyst_all_years_long <- catalyst_all_years_long %>% 
  mutate(pct = case_when(year == 2021 ~ total_selected/n_2021,
                          year == 2024 ~ total_selected/n_2024))

label_positions <- catalyst_all_years_long %>% 
  group_by(catalyst) %>% 
  summarize(year = 2021, pct = first(pct))

catalyst_all_years_long  %>% 
  ggplot(aes(x = year, y = pct, color = catalyst)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_label_repel(data = label_positions, aes(y = pct, label = catalyst)) +
  scale_x_continuous(breaks = c(2021, 2024)) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(x = "Year",
       y = "Percent of Times Selected",
       title = "Catalyst Selection by Year \n Across Schools") +
  theme(legend.position = "none")

What about schools who responded to the survey both years? So, looking within schools? Let’s narrow the sample and check that out.

catalyst_all_years_within <- catalyst_all_years %>% 
  filter(duplicated(school_id) | duplicated(school_id, fromLast = TRUE)) %>% 
  select(-contains("_other"), -contains("_key")) %>% 
  pivot_longer(cols = contains("catalyst"),
               names_to = "catalyst",
               values_to = "selected",
               names_prefix = "catalyst_") %>% 
  group_by(catalyst, year) %>% 
  summarize(total_selected = sum(selected))

label_positions <- catalyst_all_years_within %>% 
  group_by(catalyst) %>% 
  summarize(year = 2021, selected = first(total_selected))

catalyst_all_years_within %>% 
  ggplot(aes(x = year, y = total_selected, color = catalyst)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_label_repel(data = label_positions, aes(y = selected, label = catalyst)) +
  scale_x_continuous(breaks = c(2021, 2024)) +
  labs(x = "Year",
       y = "Number of Times Selected",
       title = "Catalyst Selection by Year \n Within Schools") +
  theme(legend.position = "none")

Now, in percentage. The total value here will be different than in the across graph since not ever school answered every year. Only 82 did.

n_within = 82

catalyst_all_years_within <- catalyst_all_years_within %>% 
  mutate(pct = total_selected/n_within)

label_positions <- catalyst_all_years_within %>% 
  group_by(catalyst) %>% 
  summarize(year = 2021, pct = first(pct))

catalyst_all_years_within  %>% 
  ggplot(aes(x = year, y = pct, color = catalyst)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_label_repel(data = label_positions, aes(y = pct, label = catalyst)) +
  scale_x_continuous(breaks = c(2021, 2024)) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(x = "Year",
       y = "Percent of Times Selected",
       title = "Catalyst Selection by Year \n Within Schools") +
  theme(legend.position = "none")

Create a version similar to the most added practices figures.

catalyst_all_years_within_change <- catalyst_all_years_within %>% 
  pivot_wider(names_from = year,
              values_from = c(total_selected, pct)) 

catalyst_all_years_within_change %>% 
  ggplot(aes(x = total_selected_2021, xend = total_selected_2024, y = reorder(catalyst, total_selected_2024), yend = catalyst)) +
  geom_segment(color = "black", linetype = "dotted") +
  geom_point(aes(x = total_selected_2021), color = "red") +
  geom_point(aes(x = total_selected_2024), color = "blue") +
  guides(col = guide_legend(nrow = 1, title = NULL)) + 
  bar_x_scale_count +
  geom_text(
    aes(x = (total_selected_2021 + total_selected_2024)/2, label = paste("Δ =", total_selected_2024 - total_selected_2021), color = factor(sign(total_selected_2024 - total_selected_2021))),
    nudge_y = .3,
    hjust = 0,
    show.legend = FALSE
  ) +
  scale_color_manual(
    values = c("red", "blue"),
    labels = c("2021", "2024")
  ) +
  labs(
    y = "Catalysts",
    x = "Times Selected",
    title = "Catalyst Selection from 2021 to 2024 Within Schools"
  ) +
  theme(
    panel.grid.major.y = element_blank(),
    legend.position = "bottom",
    axis.text.y = element_text(size = rel(0.6))
  )

How many schools selected just one catalyst in particular?

one_cat <- catalyst_all_years %>% 
  select(-contains("_other"), -contains("_key")) %>% 
  mutate(cat_select = rowSums(across(3:11))) %>% 
  filter(cat_select == 1) %>% 
  pivot_longer(cols = contains("catalyst"),
               names_to = "catalyst",
               values_to = "selected",
               names_prefix = "catalyst_") %>% 
  filter(selected == 1) %>% 
  group_by(year, catalyst) %>% 
  summarise(count = n())
one_cat %>% 
  ggplot(aes(x = count, y = catalyst, fill = as.factor(year))) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = transcend_cols) +
  labs(title = "Solo-Select Catalysts",
       x = "Catalyst",
       y = "Count",
       legend = "Year")

Also going to represent this information here in a change plot. Note, this is not going to be within schools since there is only once school that selected one catalyst each year. Also note, covid and student_agency were exclusive to 2021.

one_cat %>% 
  pivot_wider(names_from = "year",
              values_from = "count") %>% 
  ggplot(aes(x = `2021`, xend = `2024`, y = reorder(catalyst, `2024`), yend = catalyst)) +
  geom_segment(color = "black", linetype = "dotted") +
  geom_point(aes(x = `2021`), color = "red") +
  geom_point(aes(x = `2024`), color = "blue") +
  geom_point(x = 1, y = "internal", color = "purple") +
  geom_point(x = 1, y = "external", color = "purple") +
  guides(col = guide_legend(nrow = 1, title = NULL)) + 
  # bar_x_scale_count +
  geom_text(
    aes(x = (`2021` + `2024`)/2 -1, label = paste("Δ =", `2024` - `2021`), color = factor(sign(`2024` - `2021`))),
    nudge_y = .3,
    hjust = 0,
    show.legend = FALSE
  )  +
  labs(
    y = "Catalysts",
    x = "Times Selected",
    title = "Solo-Select Catalyst Selection \nfrom 2021 to 2024 Across Schools"
  ) +
  theme(
    panel.grid.major.y = element_blank(),
    legend.position = "bottom",
    axis.text.y = element_text(size = rel(0.6))
  )

How often have schools that responded in both 2021 and 2024 changed their key catalysts?

cat_by_year <- catalyst_all_years %>% 
  select(year, school_id, starts_with("catalyst_key"), -contains("_other")) %>% 
  pivot_longer(cols = contains("catalyst"),
               names_to = "catalyst_key",
               values_to = "selected",
               names_prefix = "catalyst_key_") %>% 
  pivot_wider(names_from = year,
              values_from = selected) %>% 
  na.omit() %>%  #omit schools that only answered one year
  mutate(selected = case_when(`2021` == 0 & `2024` == 0 ~ "neither year",
                            `2021` == 0 & `2024` == 1 ~ "added",
                            `2021` == 1 & `2024` == 0 ~ "dropped",
                            `2021` == 1 & `2024` == 1 ~ "both years")) %>% 
  group_by(catalyst_key, selected) %>% 
  summarise(n = n())
cat_by_year %>% 
  ggplot(aes(x = n, y = catalyst_key, fill = selected)) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = transcend_cols) +
  labs(title = "Catalyst Key Selection for Schools with Both Years of Data",
       x = "Number of Schools Selecting",
       y = "Catalyst",
       legend = "Status")

This is from Cycle 2. What if we looked at schools who only selected one catalyst?

one_cat_change <- catalyst_all_years %>% 
  select(-contains("_other"), -contains("_key")) %>% 
  mutate(cat_select = rowSums(across(3:11))) %>% 
  filter(cat_select == 1) %>% 
  pivot_longer(cols = contains("catalyst"),
               names_to = "catalyst",
               values_to = "selected",
               names_prefix = "catalyst_") %>% 
  filter(selected == 1)

Looks like there was just one school who meets this criteria. This school went from focusing on inequity to demographics.

I’m continuing to explore this question, this time with a vertical barchart of adds/drops only. Note again, these are schools who responded both years.

cat_by_year %>% 
  pivot_wider(names_from = "selected",
              values_from = "n") %>% 
  mutate(dropped = -1*dropped) %>% 
  ggplot(aes(x = reorder(catalyst_key, -dropped))) +
  geom_col(aes(y = added), fill = transcend_cols[1]) +
  geom_col(aes(y = dropped), fill = transcend_cols[3]) +
  geom_hline(yintercept = 0, linetype = 2) +
  coord_flip() +
  labs(title = "Catalyst Key Adds/Drops for \nSchools Answering Both Years",
       y = "Drops (Negative) and Adds (Positive)",
       x = "Catalyst")

For leaders who were concerned about future sustainability, what reasons did they cite for their concern and did these reasons differ across schools?

2024 only

barriers <- full %>% 
  select(school_id, starts_with("barrier"))

First, am curious how the free response category responded. I generated a wordcloud of these for barriers below.

library(wordcloud)
library(tm)

responses <- barriers$barrier_other_text

# Create a text corpus
corpus <- Corpus(VectorSource(responses))

# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower))  # Convert to lower case
corpus <- tm_map(corpus, removePunctuation)             # Remove punctuation
corpus <- tm_map(corpus, removeNumbers)                 # Remove numbers
corpus <- tm_map(corpus, removeWords, stopwords("english"))  # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace)               # Strip whitespace

# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)

# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)

# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

While we’re here, I’m also including a wordcloud for the catalysts for innovation here.

responses <- full$catalyst_other_text

# Create a text corpus
corpus <- Corpus(VectorSource(responses))

# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower))  # Convert to lower case
corpus <- tm_map(corpus, removePunctuation)             # Remove punctuation
corpus <- tm_map(corpus, removeNumbers)                 # Remove numbers
corpus <- tm_map(corpus, removeWords, stopwords("english"))  # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace)               # Strip whitespace

# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)

# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)

# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

Back to barriers –

For the rest of the options, here is what leaders selected.

barriers <- barriers %>% 
  select(-contains("_other")) %>% 
  pivot_longer(cols = contains("barrier"),
               names_to = "barrier",
               values_to = "selected",
               names_prefix = "barrier_") %>% 
  filter(selected == 1) %>% 
  group_by(barrier) %>% 
  summarize(n = n())
barriers %>% 
  ggplot(aes(reorder(barrier, n), n)) +
  geom_col(aes(fill = barrier)) +
  scale_fill_manual(values = c(transcend_cols, transcend_cols2)) +
  labs(title = "2024 Barriers to Sustainability",
       x = "Barrier",
       y = "Count") +
  theme(legend.position = "none") +
  coord_flip()